load packages

library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ stringr   1.5.1
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(geojsonio)
## Registered S3 method overwritten by 'geojsonsf':
##   method        from   
##   print.geojson geojson
## 
## Attaching package: 'geojsonio'
## 
## The following object is masked from 'package:base':
## 
##     pretty

load dataset

df_descriptive=read_csv("filtered_merged_dataset_sample.csv")
## Rows: 10000 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): BORO, PERP_AGE_GROUP, PERP_SEX, PERP_RACE, VIC_AGE_GROUP, VIC_SEX...
## dbl  (10): INCIDENT_KEY, PRECINCT, Latitude, Longitude, Number_poverty, Perc...
## date  (1): OCCUR_DATE
## time  (1): OCCUR_TIME
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_final <- read_csv("data_final.csv")
## Rows: 9820 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (17): BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION_DESC, PERP_...
## dbl  (15): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, X_COORD_CD, Y_COORD_CD...
## num   (2): Number_poverty, Number_education
## lgl   (3): STATISTICAL_MURDER_FLAG, Is_Holiday, Sky_Is_Dark
## dttm  (1): OCCUR_DATETIME
## date  (1): OCCUR_DATE
## time  (1): OCCUR_TIME
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Plot of the number of incidents in each borough for each year

# Summarize data: count the number of incidents by borough and year
incident_summary <- df_descriptive %>%
  group_by(BORO, Year) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop") %>%
  # Ensure missing years and boroughs are included
  complete(BORO, Year = full_seq(min(df_descriptive$Year):max(df_descriptive$Year), 1), fill = list(Number_of_Incidents = 0))

# Create the bar plot
ggplot(incident_summary, aes(x = Year, y = Number_of_Incidents, fill = BORO)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Number of Incidents in Each Borough by Year",
    x = "Year",
    y = "Number of Incidents",
    fill = "Borough"
  ) +
  scale_x_continuous(breaks = seq(min(df_descriptive$Year), max(df_descriptive$Year), by = 1)) +
  theme_minimal()

##Total incidents per NTA

Load spatial data (replace with actual shapefile path)

nta_shape <- st_read("nynta2020_24d/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `/Users/wangmingyin/Desktop/data science 1/nyc_shooting_final/nynta2020_24d/nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
cdta_shape = st_read("nycdta2020_24d/nycdta2020.shp")
## Reading layer `nycdta2020' from data source 
##   `/Users/wangmingyin/Desktop/data science 1/nyc_shooting_final/nycdta2020_24d/nycdta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 71 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
boro_shape = st_read("Borough Boundaries/geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp")
## Reading layer `geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e' from data source `/Users/wangmingyin/Desktop/data science 1/nyc_shooting_final/Borough Boundaries/geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
# Prepare incident data: count incidents per NTA_clean
nta_incident_counts <- data_final %>%
  group_by(NTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")

# Merge spatial data with incident counts
nta_map_data <- nta_shape %>%
  left_join(nta_incident_counts, by = c("NTAName" = "NTA"))

# Create custom breaks for Number_of_Incidents
nta_map_data <- nta_map_data %>%
  mutate(
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 400, by = 80),  # Breaks from 0 to 1000, every 200 cases
      labels = c("0-80", "81-160", "161-240", "241-320", "321-400"),
      include.lowest = TRUE
    )
  )

Plot the map

# Plot the map with custom ranges
ggplot(data = nta_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
    geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels
  scale_fill_manual(
    values = c(
      "0-80" = "#b2e2e2",
      "81-160" = "skyblue",
      "161-240" = "#66c2a4",
      "241-320" = "#2ca25f",
      "321-400" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC NTAs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-400, 80 breaks)",
    caption = "Data Source: Your dataset"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )
## Warning: Removed 41 rows containing missing values or values outside the scale range
## (`geom_text()`).

unmatched_nta <- setdiff(nta_shape$NTAName, data_final$NTA)
unmatched_nta
##  [1] "Brooklyn Heights"                                        
##  [2] "Windsor Terrace-South Slope"                             
##  [3] "Fort Hamilton"                                           
##  [4] "Dyker Beach Park"                                        
##  [5] "Mapleton-Midwood (West)"                                 
##  [6] "Calvert Vaux Park"                                       
##  [7] "Holy Cross Cemetery"                                     
##  [8] "McGuire Fields"                                          
##  [9] "Jamaica Bay (West)"                                      
## [10] "Shirley Chisholm State Park"                             
## [11] "North & South Brother Islands"                           
## [12] "Hart Island"                                             
## [13] "The Battery-Governors Island-Ellis Island-Liberty Island"
## [14] "Stuyvesant Town-Peter Cooper Village"                    
## [15] "United Nations"                                          
## [16] "Rikers Island"                                           
## [17] "Astoria Park"                                            
## [18] "Sunnyside Yards (South)"                                 
## [19] "Calvary & Mount Zion Cemeteries"                         
## [20] "Mount Olivet & All Faiths Cemeteries"                    
## [21] "Middle Village Cemetery"                                 
## [22] "St. John Cemetery"                                       
## [23] "Rego Park"                                               
## [24] "Bay Terrace-Clearview"                                   
## [25] "Fort Totten"                                             
## [26] "Kissena Park"                                            
## [27] "Mount Hebron & Cedar Grove Cemeteries"                   
## [28] "Cunningham Park"                                         
## [29] "Spring Creek Park"                                       
## [30] "Douglaston-Little Neck"                                  
## [31] "Montefiore Cemetery"                                     
## [32] "Breezy Point-Belle Harbor-Rockaway Park-Broad Channel"   
## [33] "LaGuardia Airport"                                       
## [34] "Jamaica Bay (East)"                                      
## [35] "Jacob Riis Park-Fort Tilden-Breezy Point Tip"            
## [36] "Oakwood-Richmondtown"                                    
## [37] "Freshkills Park (South)"                                 
## [38] "Fort Wadsworth"                                          
## [39] "Hoffman & Swinburne Islands"                             
## [40] "Miller Field"                                            
## [41] "Great Kills Park"

summarize CDTA and BORO

## There is space between letter and number in CDTA, I deleted the space below
data_final$CDTA <- gsub(" ", "", data_final$CDTA)

cdta_incident_counts <- data_final %>%
  group_by(CDTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")

merge datasets

# Prepare incident data: count incidents per NTA_clean
cdta_incident_counts <- data_final %>%
  group_by(CDTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")

# Merge spatial data with incident counts
cdta_map_data <- cdta_shape %>%
  left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))

# Create custom breaks for Number_of_Incidents
cdta_map_data <- cdta_map_data %>%
  mutate(
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 400, by = 80),  # Breaks from 0 to 1000, every 200 cases
      labels = c("0-80", "81-160", "161-240", "241-320", "321-400"),
      include.lowest = TRUE
    )
  )
# Plot the map with custom ranges
ggplot(data = cdta_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
  geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels
  scale_fill_manual(
    values = c(
      "0-80" = "#b2e2e2",
      "81-160" = "skyblue",
      "161-240" = "#66c2a4",
      "241-320" = "#2ca25f",
      "321-400" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC CDTAs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-400, 80 breaks)",
    caption = "Data Source: Your dataset"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(), 
    axis.title.x = element_blank(),  # Remove x-axis label
    axis.title.y = element_blank()
  )
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_text()`).

# Count the boro incident
boro_incident_counts <- data_final %>%
  group_by(BORO) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")  %>%
  mutate(BORO = tolower(BORO) )

# Lowercase the boro in boro_shape
boro_shape = boro_shape %>%
  mutate(boro_name = tolower(boro_name))
 

# Merge spatial data with incident counts
boro_map_data <- boro_shape %>%
  left_join(boro_incident_counts, by = c("boro_name" = "BORO"))

# Create custom breaks for Number_of_Incidents
boro_map_data <- boro_map_data %>%
  mutate(
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 4000, by = 800),  # Breaks from 0 to 4000, every 800 cases
      labels = c("0-800", "801-1600", "1601-2400", "2401-3200", "3201-4000"),
      include.lowest = TRUE
    )
  )
# Plot the map with custom ranges
ggplot(data = boro_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
  geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels
  scale_fill_manual(
    values = c(
      "0-800" = "#b2e2e2",
      "801-1600" = "skyblue",
      "1601-2400" = "#66c2a4",
      "2401-3200" = "#2ca25f",
      "3201-4000" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC BOROs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-4000, 800 breaks)",
    caption = "Data Source: Your dataset"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    axis.title.x = element_blank(),  # Remove x-axis label
    axis.title.y = element_blank()
  )
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

Plotly of boro map

boro_map_data <- boro_map_data %>%
  mutate(
    hover_text = paste("Borough:", boro_name, "<br>Total Incidents:", Number_of_Incidents)
  )

# Create the interactive plot with click functionality
plot <- plot_ly(
  data = boro_map_data,
  type = "scattermapbox",
  split = ~boro_name,  # Separate polygons by boroughs
  color = ~Number_of_Incidents,  # Color based on the number of incidents
  colors = "viridis",  # Use a color scale
  text = ~hover_text,  # Display hover text
  hoverinfo = "text",
  marker = list(size = 8, opacity = 0.7)
) %>%
  layout(
    title = "Total Number of Incidents Across NYC BOROs (2017-2023)",
    mapbox = list(
      style = "carto-positron",  # Base map style
      center = list(lon = -74.0060, lat = 40.7128),  # Center map on NYC
      zoom = 10
    )
  )

# Add click functionality to display the borough name and number of incidents
plot <- plot %>%
  event_register("plotly_click") %>%
  htmlwidgets::onRender("
    function(el, x) {
      el.on('plotly_click', function(d) {
        var point = d.points[0];
        var text = point.text;
        alert('You clicked on: ' + text);
      });
    }
  ")

# Display the interactive plot
plot
## No scattermapbox mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...

plotly for CDTA

# Prepare CDTA-level data
cdta_map_data <- cdta_map_data %>%
  mutate(
    hover_text = paste("CDTA:", CDTA2020, "<br>Total Incidents:", Number_of_Incidents)
  )
# Create the interactive plot with click functionality
plot <- plot_ly(
  data = cdta_map_data,
  type = "scattermapbox",
  split = ~CDTA2020,  # Separate polygons by boroughs
  color = ~Number_of_Incidents,  # Color based on the number of incidents
  colors = "viridis",  # Use a color scale
  text = ~hover_text,  # Display hover text
  hoverinfo = "text",
  marker = list(size = 8, opacity = 0.7)
) %>%
  layout(
    title = "Total Number of Incidents Across NYC CDTAs (2017-2023)",
    mapbox = list(
      style = "carto-positron",  # Base map style
      center = list(lon = -74.0060, lat = 40.7128),  # Center map on NYC
      zoom = 10
    )
  )

# Add click functionality to display the borough name and number of incidents
plot <- plot %>%
  event_register("plotly_click") %>%
  htmlwidgets::onRender("
    function(el, x) {
      el.on('plotly_click', function(d) {
        var point = d.points[0];
        var text = point.text;
        alert('You clicked on: ' + text);
      });
    }
  ")

# Display the interactive plot
plot
## No scattermapbox mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
# Remove any trailing spaces or mismatches in CDTA identifiers:
cdta_shape$CDTA2020 <- gsub(" ", "", cdta_shape$CDTA2020)
data_final$CDTA <- gsub(" ", "", data_final$CDTA)
# Identify Missing Matches
unmatched_cdta <- setdiff(cdta_shape$CDTA2020, data_final$CDTA)
print(unmatched_cdta)  # These are the CDTAs that are missing from the dataset
## [1] "QN80" "SI95" "QN84"
# Assign 0 to Missing Areas
cdta_incident_counts <- data_final %>%
  group_by(CDTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop") %>%
  complete(CDTA = unique(cdta_shape$CDTA2020), fill = list(Number_of_Incidents = 0))
#Re-Merge the Data
cdta_map_data <- cdta_shape %>%
  left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))
# Update NA Handling
cdta_map_data <- cdta_map_data %>%
  mutate(
    Number_of_Incidents = ifelse(is.na(Number_of_Incidents), 0, Number_of_Incidents),
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 600, by = 120), 
      labels = c("0-120", "121-240", "241-360", "361-480", "481-600"),
      include.lowest = TRUE
    )
  )
ggplot(data = cdta_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
 geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels+
  scale_fill_manual(
    values = c(
      "0-120" = "#b2e2e2",
      "121-240" = "skyblue",
      "241-360" = "#66c2a4",
      "361-480" = "#2ca25f",
      "481-600" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC CDTAs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-600, 120 breaks)",
    caption = "Data Source: Your dataset"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  )

check unmatch CDTA

unmatched <- cdta_shape %>%
  filter(!CDTA2020 %in% data_final$CDTA)
print(unmatched)  # This shows unmatched areas
## Simple feature collection with 3 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 945159.7 ymin: 132138 xmax: 1049020 ymax: 225678
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   BoroCode      BoroName CountyFIPS CDTA2020
## 1        4        Queens        081     QN80
## 2        5 Staten Island        085     SI95
## 3        4        Queens        081     QN84
##                                                      CDTAName CDTAType
## 1               QN80 LaGuardia Airport (JIA 80 Approximation)        1
## 2 SI95 Great Kills Park-Fort Wadsworth (JIA 95 Approximation)        1
## 3              QN84 Jamaica Bay (East) (JIA 84 Approximation)        1
##   Shape_Leng Shape_Area                       geometry
## 1   42122.47   30591949 MULTIPOLYGON (((1019455 225...
## 2   80517.91   44747671 MULTIPOLYGON (((951604.1 13...
## 3  239900.06  122558581 MULTIPOLYGON (((1022140 148...
# Filter out borough
boroughs <- unique(cdta_map_data$BoroName)
for (b in boroughs) {
    borough_data <- cdta_map_data %>%
        filter(BoroName == b)
    plot <- ggplot(data = borough_data) +
        geom_sf(aes(fill = Number_of_Incidents), color = "black") +
      geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "white") +  # Add labels+
        scale_fill_gradientn(
      colors = c("blue", "green", "yellow", "red"), # Custom color scale
      name = "Number of Incidents"
    ) +
        labs(
            title = paste("CDTA Incidents in", b),
            subtitle = "2017 to 2023",
            x = "Longitude",
            y = "Latitude"
        ) +
        theme_minimal()
    print(plot)  # Move inside the loop
}